library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.0      ✔ stringr 1.4.0 
## ✔ readr   2.1.2      ✔ forcats 0.5.1 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
ufo <- read_csv("UFO_and_Weather.csv")
## New names:
## Rows: 22482 Columns: 18
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (4): city, state, shape, text dbl (12): ...1, city_latitude, city_longitude,
## year, month, day, hour, temp... lgl (1): snow dttm (1): date_time
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
ufo[duplicated(ufo) | duplicated(ufo, fromLast=TRUE), ]
## # A tibble: 0 × 18
## # … with 18 variables: ...1 <dbl>, city <chr>, state <chr>, date_time <dttm>,
## #   shape <chr>, text <chr>, city_latitude <dbl>, city_longitude <dbl>,
## #   year <dbl>, month <dbl>, day <dbl>, hour <dbl>, temperature <dbl>,
## #   relative_humidity <dbl>, precipitation <dbl>, snow <lgl>,
## #   wind_direction <dbl>, wind_speed <dbl>
ufo1 <- ufo %>% 
  group_by(state) %>% 
  summarize(count = n())
l <- list(color = toRGB("white"), width = 2)
g <- list(
  scope = 'usa',
  projection = list(type = 'albers usa'),
  showlakes = TRUE,
  lakecolor = toRGB('white')
)
fig <- plot_geo(ufo1, locationmode = 'USA-states')
fig <- fig %>% add_trace(
    z = ~count, text = ~count, locations = ~state,
    color = ~count, colors = 'Purples'
  )
fig <- fig %>% colorbar(title = "counts")
fig <- fig %>% layout(
    title = 'UFO sightings in the US from 2015-2019',
    geo = g
  )

fig

https://www.kaggle.com/datasets/peretzcohen/2019-census-us-population-data-by-state

us_pop <- read_csv("2019_Census_US_Population_Data_By_State_Lat_Long.csv")
## Rows: 51 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): STATE
## dbl (3): POPESTIMATE2019, lat, long
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
state <- data.frame(state.abb, state.name)

us_pop1 <- us_pop %>% 
  left_join(state, by = c("STATE" = "state.name"))

us_pop1$state.abb[us_pop1["STATE"] == "District of Columbia"] = "DC"
us_pop1
## # A tibble: 51 × 5
##    STATE                POPESTIMATE2019   lat   long state.abb
##    <chr>                          <dbl> <dbl>  <dbl> <chr>    
##  1 Alabama                      4903185  32.4  -86.3 AL       
##  2 Alaska                        731545  58.3 -134.  AK       
##  3 Arizona                      7278717  33.4 -112.  AZ       
##  4 Arkansas                     3017804  34.7  -92.3 AR       
##  5 California                  39512223  38.6 -121.  CA       
##  6 Colorado                     5758736  39.7 -105.  CO       
##  7 Connecticut                  3565287  41.8  -72.7 CT       
##  8 Delaware                      973764  39.2  -75.5 DE       
##  9 District of Columbia          705749  38.9  -77.0 DC       
## 10 Florida                     21477737  30.4  -84.3 FL       
## # … with 41 more rows
Only keep US states!!!
ufo2 <- ufo1 %>% 
  inner_join(us_pop1, by = c("state" = "state.abb")) %>% 
  mutate(proportion = count / POPESTIMATE2019) %>% 
  select(state, proportion)
l <- list(color = toRGB("white"), width = 2)
g <- list(
  scope = 'usa',
  projection = list(type = 'albers usa'),
  showlakes = TRUE,
  lakecolor = toRGB('white')
)
fig <- plot_geo(ufo2, locationmode = 'USA-states')
fig <- fig %>% add_trace(
    z = ~proportion, locations = ~state,
    color = ~proportion, colors = 'Reds'
  )
fig <- fig %>% colorbar(title = "proportions")
fig <- fig %>% layout(
    title = 'UFO sightings per population in the US from 2015-2019',
    geo = g
  )

fig
ufo %>% group_by(city, state) %>% 
  summarise(n = n())
## `summarise()` has grouped output by 'city'. You can override using the
## `.groups` argument.
## # A tibble: 6,837 × 3
## # Groups:   city [5,283]
##    city       state     n
##    <chr>      <chr> <int>
##  1 Abbotsford BC        1
##  2 Abbott     TX        1
##  3 Aberdeen   MD        2
##  4 Aberdeen   NC        1
##  5 Aberdeen   SD        2
##  6 Aberdeen   WA        4
##  7 Abilene    KS        2
##  8 Abilene    TX        4
##  9 Abingdon   MD        2
## 10 Abingdon   VA        3
## # … with 6,827 more rows